This document will try to approach Ferrari’s historical analysis from a quantitative perspective, it is meant to be a complementary document to the case write app & excel worksheet submitted by Team-7.
# transform data into the correct format
df_tall[, c("Parameter", "Company", "Year", "Ticker", "Group")] <- lapply(df_tall[, c("Parameter", "Company", "Year", "Ticker", "Group")], as.factor)
# drop post analysis column
df_tall$Is.Leading.Indicator <- NULL
df_tall$Comparable.Class <- NULL
# show the final data format
str(df_tall)## 'data.frame': 540 obs. of 6 variables:
## $ Parameter: Factor w/ 12 levels "AP2Revenues",..: 10 10 10 10 10 10 10 10 10 10 ...
## $ Company : Factor w/ 15 levels "BMW","Burberry Group plc",..: 5 5 5 6 6 6 13 13 13 1 ...
## $ Year : Factor w/ 3 levels "2013","2014",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ Value : num 0.0682 0.0604 0.043 0.0647 0.0353 ...
## $ Ticker : Factor w/ 15 levels "BMWYY","BRBY",..: 12 12 12 6 6 6 14 14 14 1 ...
## $ Group : Factor w/ 3 levels "Auto","Luxury",..: 3 3 3 1 1 1 1 1 1 1 ...
Wrangle data in preparation for the clustering analysis
# spread data around year
df <- spread(df_tall, key = Year, value = Value)
# drop company name, group , and year 2015
df$Company <- NULL
df$Group <- NULL
df$`2015` <- NULL
# split the df into a df_list around parameters while dropping the parameter column
dfl <- split(df, f = df$Parameter)
dfl <- lapply(dfl, function(x) subset(x, select = -Parameter))
summary(dfl)## Length Class Mode
## AP2Revenues 3 data.frame list
## AR2Revenues 3 data.frame list
## CapitalTurnover 3 data.frame list
## Cash2Revenues 3 data.frame list
## COGS2Revenue 3 data.frame list
## EBITMargin 3 data.frame list
## Inventories2Revenue 3 data.frame list
## NPPE2Revenue 3 data.frame list
## PrepaidExpenses2Revenues 3 data.frame list
## ROIC 3 data.frame list
## SGA2Revenue 3 data.frame list
## WCR2Revenues 3 data.frame list
str(dfl$AP2Revenues)## 'data.frame': 15 obs. of 3 variables:
## $ Ticker: Factor w/ 15 levels "BMWYY","BRBY",..: 1 2 3 4 12 6 5 7 8 11 ...
## $ 2013 : num 0.1014 0.0591 0.077 0.0473 0.2081 ...
## $ 2014 : num 0.098 0.0748 0.0784 0.0478 0.1939 ...
# use ticker name as row column for each df in dfl & drop ticker column
for (item in 1:length(dfl)) {
rownames(dfl[[item]]) <- dfl[[item]]$Ticker
dfl[[item]]$Ticker <- NULL
}
head(dfl$EBITMargin, 7)## 2013 2014
## BMWYY 0.10499744 0.11340655
## BRBY 0.21418923 0.19757060
## DDAIF 0.09166653 0.07812308
## EL 0.15334080 0.16984383
## RACE 0.15567750 0.14099104
## FCAU 0.04015143 0.03441905
## F 0.04942993 0.02918649
#build the a clusterization function
build_cluster_map <- function(dataframe, number_of_clusters) {
optimum_cluster <- kmeans(dataframe,
centers = number_of_clusters,
nstart = 25)
cluster_df <- as.data.frame(optimum_cluster$cluster)
colnames(cluster_df)[1] <- "Cluster"
return (
list(
cluster_table = cluster_df,
visualization = fviz_cluster(optimum_cluster, data = dataframe)
)
)
}
analyize_cluster <- function(dataframe, number_of_clusters = 3){
k_max <- 10
# Average Silhouette Width Analysis
p1 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "silhouette") +
theme_minimal() + theme(axis.title.y = element_blank()) +
ggtitle("Average Silhouette Width") +
geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
# Total Within Sum of Squares Analysis
p2 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "wss") +
theme_minimal() + theme(axis.title.y = element_blank()) +
ggtitle("Total Within Sum of Squares") +
geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
# Gap Statistics (k) Analysis
gap_stat <- clusGap(dataframe, FUN = kmeans, nstart = 25, K.max = k_max, B = 50)
p3 <- fviz_gap_stat(gap_stat) +
theme_minimal() + theme(axis.title.y = element_blank()) +
ggtitle("Gap Statistics (k)") +
geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
# Cluster Analysis
cluster_map <- build_cluster_map(dataframe, number_of_clusters)
p4 <- cluster_map$visualization +
theme_minimal() +
ggtitle(paste("Cluster Plot for",number_of_clusters,"Clusters"))
p5 <- ggplot() +
theme_minimal() +
annotation_custom(tableGrob(cluster_map$cluster_table))
ggarrange(
ggarrange(p1, p2, p3, widths = c(1,1), ncol = 3),
ggarrange(p4, p5, widths = c(2,1), ncol = 2),
nrow = 2, labels = c("A","B"), heights = c(2,3)
)
}In this section we will utilize principle components to reduce the 12 financial ratios into two dimensions, we understand that this may cause loss of information due the reduction method, but our hypothesis is that performing clustering analysis on the principle components may reveal association between Ferrari & other companies that we can’t see by analyzing individual financial ratio.
# spread data around Parameter
dfw <- pivot_wider(df_tall, names_from = Parameter, values_from = Value)
# drop company name, group , and year 2015
dfw$Company <- NULL
dfw$Group <- NULL
dfw <- subset(dfw, Year != 2015)
dfw$Year <- factor(dfw$Year, levels = c(2013, 2014))
dfw <- as.data.frame(dfw)
# split the df into a df_list around Year while dropping the Year column
dfwl <- split(dfw, f = dfw$Year)
dfwl <- lapply(dfwl, function(x) subset(x, select = -Year))
summary(dfwl)## Length Class Mode
## 2013 13 data.frame list
## 2014 13 data.frame list
head(dfwl$'2013', 7)## Ticker ROIC EBITMargin CapitalTurnover COGS2Revenue SGA2Revenue
## 1 RACE 0.06820394 0.15567750 0.7065015 0.4129685 0.11128478
## 3 FCAU 0.06465148 0.04015143 1.2272602 0.8641429 0.07825624
## 5 TSLA 0.04775863 0.04978902 0.9846389 0.7733981 0.14182745
## 7 BMWYY 0.04247918 0.10499744 0.5958915 0.7992611 0.09541277
## 9 VLKAF 0.02738411 0.05924155 0.6431643 0.8175395 0.13927424
## 11 DDAIF 0.05921053 0.09166653 0.7434841 0.7876201 0.11978946
## 13 TM 0.02805701 0.06465752 0.7075484 0.8448528 0.09528149
## NPPE2Revenue Cash2Revenues AR2Revenues Inventories2Revenue
## 1 0.2431410 0.3415023 0.45763445 0.10170130
## 3 0.2748492 0.2334911 0.03009582 0.12158997
## 5 0.5567029 0.4201096 0.02438992 0.16903684
## 7 0.5401333 0.1739439 0.31488713 0.12615207
## 9 0.3281508 0.1942114 0.05651068 0.14544153
## 11 0.4232764 0.1394535 0.06613721 0.14704785
## 13 0.3105139 0.1482338 0.32130427 0.07776337
## PrepaidExpenses2Revenues WCR2Revenues AP2Revenues
## 1 0.00371263 2.084257 0.20807016
## 3 0.00522891 1.479959 0.20356087
## 5 0.01369459 1.875021 0.15096578
## 7 0.00000000 1.020534 0.10139497
## 9 0.00000000 1.030070 0.09148913
## 11 0.00250886 1.191734 0.07701175
## 13 0.02388639 1.067560 0.09580129
# build row name & scale all values
for (item in 1:length(dfwl)) {
rownames(dfwl[[item]]) <- dfwl[[item]]$Ticker
# use ticker name as row column for each df in dfl & drop ticker column
dfwl[[item]]$Ticker <- NULL
for (column in 1:length(dfwl[[item]])) {
dfwl[[item]][[column]] <- scale(dfwl[[item]][[column]])
}
}
head(dfwl$'2013', 7)## ROIC EBITMargin CapitalTurnover COGS2Revenue SGA2Revenue
## RACE -0.2608454 0.3336409 -0.4813249 -0.6379818 -0.6645583
## FCAU -0.3091011 -1.0662105 0.8919971 1.0838551 -0.8402640
## TSLA -0.5385692 -0.9494300 0.2521669 0.7375417 -0.5020769
## BMWYY -0.6102839 -0.2804590 -0.7730206 0.8362438 -0.7489944
## VLKAF -0.8153315 -0.8348918 -0.6483551 0.9060004 -0.5156595
## DDAIF -0.3830095 -0.4419922 -0.3837961 0.7918179 -0.6193149
## TM -0.8061910 -0.7692655 -0.4785639 1.0102375 -0.7496928
## NPPE2Revenue Cash2Revenues AR2Revenues Inventories2Revenue
## RACE -0.515177778 1.5429326 2.5433476 -0.68965509
## FCAU -0.274048925 0.2983106 -0.7795861 -0.35624757
## TSLA 1.869347126 2.4487315 -0.8239337 0.43913708
## BMWYY 1.743341656 -0.3878563 1.4338809 -0.27976994
## VLKAF 0.131290655 -0.1543127 -0.5742834 0.04359260
## DDAIF 0.854687022 -0.7852925 -0.4994637 0.07052045
## TM -0.002831248 -0.6841156 1.4837565 -1.09094314
## PrepaidExpenses2Revenues WCR2Revenues AP2Revenues
## RACE -0.3978348 1.1257034 1.9987086
## FCAU -0.2826572 -0.1465036 1.9041269
## TSLA 0.3604017 0.6852055 0.8009516
## BMWYY -0.6798487 -1.1137156 -0.2387898
## VLKAF -0.6798487 -1.0936413 -0.4465635
## DDAIF -0.4892740 -0.7532951 -0.7502247
## TM 1.1345778 -1.0147139 -0.3561165
df_to_analyze <- dfwl$'2013'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfwl$'2014'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 7)
The two graphs above highlights that RACE is creating its unique
clusters for both 2013/2014, yet we can notice the adjacency of RACE to
the the cluster of
PPRUY, LRCY, LVMUY and the cluster of
HMC, BMWYY, VLKAF, DDAIF, TM. we will try to uncover that
relationship in the next section by limiting the parameters of the
principle components analysis.
In this section we will rerun the principle components analysis on ROIC, EBIT Margin, and Capital Turnover parameters only.
# build a limited data frame
dfwll <- dfwl
# build row name & scale all values
for (item in 1:length(dfwll)) {
dfwll[[item]] <- select(dfwll[[item]],
-c(COGS2Revenue,
SGA2Revenue,
NPPE2Revenue,
Cash2Revenues,
AR2Revenues,
Inventories2Revenue,
PrepaidExpenses2Revenues,
WCR2Revenues,
AP2Revenues
)
)
}
head(dfwll$'2013', 7)## ROIC EBITMargin CapitalTurnover
## RACE -0.2608454 0.3336409 -0.4813249
## FCAU -0.3091011 -1.0662105 0.8919971
## TSLA -0.5385692 -0.9494300 0.2521669
## BMWYY -0.6102839 -0.2804590 -0.7730206
## VLKAF -0.8153315 -0.8348918 -0.6483551
## DDAIF -0.3830095 -0.4419922 -0.3837961
## TM -0.8061910 -0.7692655 -0.4785639
df_to_analyze <- dfwll$'2013'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfwll$'2014'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)
The analysis above suggests that principle components cauterization over
ROIC, EBIT Margin, and Capital Turnover suggests that RACE are in the
same cluster of
LVMUY, PPRUY, LRLCY with proximity to the
cluster of BMWYY, VLKAF, DDAIF, TM, HMC.
df_to_analyze <- dfl$AP2Revenues
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 4)df_to_analyze <- dfl$AR2Revenues
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 3)df_to_analyze <- dfl$CapitalTurnover
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$Cash2Revenues
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 4)df_to_analyze <- dfl$EBITMargin
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfl$Inventories2Revenue
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfl$NPPE2Revenue
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 4)df_to_analyze <- dfl$PrepaidExpenses2Revenues
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$ROIC
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$SGA2Revenue
# identify the optimum cluster size
analyize_cluster(df_to_analyze,6 )df_to_analyze <- dfl$WCR2Revenues
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 3)